import pandas as pd
import sys
# 面试题3：文本统计工具

# 文件名校检
csvFile = ".csv"
if len(sys.argv) < 2:
    print("请输入文件名!")
    exit(1)
fileName = sys.argv[1]

if csvFile not in fileName:
    print("请输入正确文件名!")
    exit(1)

# 读取文件
data = pd.read_csv(fileName)
print("文件读取成功")

# 将时间聚合成年
data["year"] = pd.to_datetime(data["Start_Date_Time"]).dt.year

# 获取到所有犯罪代号
offences = data['Offence Code'].unique()

# 建立罪名库字典对象
offenceCode = {}

def lookup_offenceCode(code):
    return offenceCode.get(code, None)

# 罪名库装载数据
print("开始装载罪名库!")
for offence in offences:
    result = data.loc[data["Offence Code"] == offence, ["Crime Name1", "Crime Name2", "Crime Name3"]].iloc[0]
    # 将三个罪名进行组装    
    crime = '{}/{}/{}'.format(result['Crime Name1'], result['Crime Name2'], result['Crime Name3'])
    offenceCode[offence] = crime
    
# 新增罪名列
data['crime name'] = data["Offence Code"].apply(lookup_offenceCode)

output = data.groupby(["City","year","crime name"])["Victims"].sum().reset_index()
output.to_csv("test3_output.csv")
print("文件输出完成")